POS Distribution

Chisheng Li

Tokenize the SOTU speech collections using the NLTK package and then write a program that compute the frequency of words and POS. Compare the speeches based on the following properties:

  • vocabulary size (number of unique words);
  • frequency of stopwords;
  • number of capital letters;
  • average number of characters per word;
  • number of nouns, adjectives, verbs, adverbs, and pronouns;
  • the top 10 nouns, top 10 verbs, and top 10 adjectives.

In [1]:
import csv, re
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from itertools import groupby
from math import log

POS Distribution for the entire SOTU speech


In [2]:
def pos_stats(filename):
    pattern = re.compile("[A-Z]{1}")
    sum_stats= dict();
    
    with open ('stopwords.txt') as stop:
        stop_words = [w.strip().lower() for w in stop]
        
        with open(filename) as text:            
            text_string = text.read()
            text_words = word_tokenize(text_string)
            text_words.sort()
            
            # output: (word, frequency)
            # Word frequency count with upper case words
            word_freq_cap = [(key, len(list(group))) for key, group in groupby(text_words, lambda x: x)]
            
            target = open('POS output/' + filename + '_withCap.txt', 'w')
            target.write(str(word_freq_cap))
            
            # Word frequency count without upper case words
            word_freq = [(key, len(list(group))) for key, group in groupby(text_words, lambda x: x.lower())]
            
            target = open('POS output/' + filename + '_noCap.txt', 'w')
            target.write(str(word_freq))
            
            # filter out stop words
            word_filtered = [f for f in word_freq if f[0] not in stop_words]
            
            # vocabulary size with and without stop words
            sum_stats["vocabulary size"] = [len(word_freq), len(word_filtered)]
            
            # stopword frequency
            stop_freq = 0;
            # total word frequency
            total_freq = 0;
            
            for w, frq in word_freq:
                if w in stop_words:
                    stop_freq += frq
                total_freq += frq
            
            sum_stats["stop word frequency"] = stop_freq/float(total_freq)
            
            # number of capital letters            
            # average number of character per word
            total_chars = 0;
            total_words = 0;
            total_capital = 0;
            
            for word, freq in word_freq_cap:
                total_chars += len(word)*freq
                total_words += freq
                total_capital += freq * len(pattern.findall(word))
            
            if total_chars != 0 and total_words != 0:
                sum_stats["avg chars per word"] = total_chars/float(total_words)
                sum_stats["number of capital letters"] = total_capital
                
            # POS tagging, without filtering stop words 
            pos_info = None
            pos_info = pos_tag(word_tokenize(text_string.lower()))
            
            target = open('POS output/' + filename + 'pos_noCap.txt', 'w')
            target.write(str(pos_info))
                
            pos_counts = dict();
            cross_walk = {'NN':'noun', 'JJ':'adjectives', 
                          'VB':'verbs', 'RB':'adverbs', 'PRP':'pronouns'}
            
            for word, pos in pos_info:
                pos_type = cross_walk.get(pos);
                if pos:
                    pos_counts[pos_type] = pos_counts.get(pos_type, 0) + 1
                else:
                    None
                    
            sum_stats["POS counts"] = pos_counts
            
            pos_info_sort = sorted (pos_info, key= lambda x: (x[1], x[0]))
            pos_word_counts = [(k, len(list(group))) for k, group in groupby(pos_info_sort, lambda x: x )]

            # desired output: ((word, pos) counts)  
            # but the problem is that pos does not combine        
            tmp1 = sorted(pos_word_counts, key = lambda x: x[0][1])
            tmp2 = [(k, sorted(group, key = lambda x: x[1], reverse=True)) for k, group in groupby(tmp1, lambda x: x[0][1])]
            
            pos_list , word_count = zip(*tmp2)
            
            topwords_by_pos = dict()
            
            for pos_type in pos_list:
                ts1 = word_count[pos_list.index(pos_type)]
                cnt = 0
                ts2 = list();
                for word_tup in ts1:
                    if not (cnt < 10):
                        break
                    ts2.append((word_tup[0][0], word_tup[1]))
                    cnt += 1
                topwords_by_pos[cross_walk.get(pos_type)] = ts2
            
            sum_stats["top words by pos"] = topwords_by_pos
            
            # output: (frequency, # of unique word)
            word_sorted = sorted(word_filtered, key = lambda x: x[1])
            cnt_word = [(key, len(list(group))) for key, group in groupby(word_sorted, lambda x: x[1])]
            
            log_word = [(log(x), log(y)) for x, y in cnt_word]

            target = open('POS output/' + filename + 'summary.txt', 'w')
            target.write(str(sum_stats))

        return sum_stats

In [3]:
pos_stats('sotu1790-2015.txt')


Out[3]:
{'POS counts': {None: 1265036,
  'adjectives': 139661,
  'adverbs': 66323,
  'noun': 337705,
  'pronouns': 55203,
  'verbs': 73184},
 'avg chars per word': 4.564443726671668,
 'number of capital letters': 176503,
 'stop word frequency': 0.5033321397207772,
 'top words by pos': {None: [('``', 1028)],
  'adjectives': [('such', 4032),
   ('other', 3664),
   ('great', 3178),
   ('new', 2939),
   ('last', 2592),
   ('national', 2135),
   ('american', 1878),
   ('own', 1824),
   ('many', 1715),
   ('foreign', 1577)],
  'adverbs': [('not', 9381),
   ('so', 3283),
   ('now', 3049),
   ('only', 2043),
   ('also', 1750),
   ('well', 1371),
   ('just', 1160),
   ('still', 1036),
   ('already', 927),
   ('far', 905)],
  'noun': [('government', 6930),
   ('congress', 4840),
   ('year', 3866),
   ('country', 3381),
   ('war', 2736),
   ('time', 2689),
   ('world', 2425),
   ('state', 2316),
   ('law', 2047),
   ('nation', 2033)],
  'pronouns': [('it', 15481),
   ('we', 12340),
   ('i', 9379),
   ('they', 5010),
   ('them', 3654),
   ('us', 2430),
   ('you', 2203),
   ('he', 1335),
   ('me', 896),
   ('themselves', 670)],
  'verbs': [('be', 18617),
   ('have', 2248),
   ('make', 1582),
   ('do', 1078),
   ('give', 797),
   ('take', 741),
   ('meet', 670),
   ('provide', 667),
   ('continue', 661),
   ('help', 634)]},
 'vocabulary size': [34148, 33493]}

By Presidents

George Washington (1790-1796)


In [4]:
pos_stats('1GW.txt')


Out[4]:
{'POS counts': {None: 12082,
  'adjectives': 1158,
  'adverbs': 567,
  'noun': 3000,
  'pronouns': 518,
  'verbs': 762},
 'avg chars per word': 4.658926300657931,
 'number of capital letters': 1312,
 'stop word frequency': 0.5343064079172887,
 'top words by pos': {None: [('``', 4)],
  'adjectives': [('such', 39),
   ('other', 37),
   ('general', 25),
   ('national', 24),
   ('last', 21),
   ('public', 21),
   ('further', 19),
   ('necessary', 19),
   ('great', 18),
   ('military', 17)],
  'adverbs': [('not', 110),
   ('so', 22),
   ('too', 20),
   ('however', 16),
   ('well', 16),
   ('yet', 15),
   ('also', 14),
   ('now', 13),
   ('far', 12),
   ('even', 11)],
  'noun': [('government', 47),
   ('country', 33),
   ('public', 33),
   ('state', 32),
   ('peace', 29),
   ('present', 28),
   ('congress', 27),
   ('house', 26),
   ('war', 26),
   ('part', 24)],
  'pronouns': [('it', 157),
   ('i', 104),
   ('you', 69),
   ('them', 58),
   ('they', 39),
   ('we', 28),
   ('me', 21),
   ('us', 14),
   ('he', 8),
   ('itself', 6)],
  'verbs': [('be', 243),
   ('have', 40),
   ('afford', 8),
   ('add', 6),
   ('prevent', 6),
   ('lay', 5),
   ('merit', 5),
   ('render', 5),
   ('secure', 5),
   ('engage', 4)]},
 'vocabulary size': [3016, 2736]}

John Adams (1797-1800)


In [5]:
pos_stats('2JA.txt')


Out[5]:
{'POS counts': {None: 5198,
  'adjectives': 507,
  'adverbs': 216,
  'noun': 1288,
  'pronouns': 238,
  'verbs': 307},
 'avg chars per word': 4.681882656350742,
 'number of capital letters': 616,
 'stop word frequency': 0.5245647969052224,
 'top words by pos': {None: [('when', 9), ('where', 4), ('how', 1)],
  'adjectives': [('great', 22),
   ('such', 16),
   ('necessary', 15),
   ('national', 13),
   ('other', 13),
   ('foreign', 11),
   ('british', 7),
   ('last', 7),
   ('public', 7),
   ('considerable', 6)],
  'adverbs': [('not', 36),
   ('so', 16),
   ('now', 9),
   ('still', 7),
   ('far', 5),
   ('however', 5),
   ('also', 4),
   ('confidently', 4),
   ('just', 4),
   ('then', 4)],
  'noun': [('commerce', 22),
   ('treaty', 22),
   ('country', 21),
   ('government', 19),
   ('article', 14),
   ('congress', 14),
   ('france', 13),
   ('war', 13),
   ('house', 12),
   ('part', 12)],
  'pronouns': [('it', 75),
   ('i', 42),
   ('we', 30),
   ('they', 23),
   ('you', 21),
   ('them', 14),
   ('us', 11),
   ('me', 8),
   ('he', 3),
   ('itself', 3)],
  'verbs': [('be', 89),
   ('have', 7),
   ('give', 6),
   ('ascertain', 4),
   ('continue', 4),
   ('secure', 4),
   ('assemble', 3),
   ('avoid', 3),
   ('bring', 3),
   ('consider', 3)]},
 'vocabulary size': [1700, 1470]}

Thomas Jefferson (1801-1808)


In [6]:
pos_stats('3TJ.txt')


Out[6]:
{'POS counts': {None: 15169,
  'adjectives': 1406,
  'adverbs': 805,
  'noun': 3533,
  'pronouns': 710,
  'verbs': 913},
 'avg chars per word': 4.5762967564449575,
 'number of capital letters': 1298,
 'stop word frequency': 0.5383591427430447,
 'top words by pos': {None: [('when', 25), ('where', 21), ('how', 9)],
  'adjectives': [('other', 58),
   ('necessary', 50),
   ('last', 44),
   ('such', 37),
   ('great', 33),
   ('new', 28),
   ('own', 23),
   ('same', 23),
   ('foreign', 21),
   ('general', 20)],
  'adverbs': [('not', 106),
   ('so', 47),
   ('now', 35),
   ('also', 31),
   ('well', 22),
   ('still', 21),
   ('however', 20),
   ('already', 19),
   ('only', 18),
   ('soon', 18)],
  'noun': [('country', 56),
   ('peace', 55),
   ('time', 47),
   ('congress', 44),
   ('year', 39),
   ('state', 38),
   ('part', 34),
   ('public', 34),
   ('war', 34),
   ('debt', 32)],
  'pronouns': [('it', 165),
   ('them', 101),
   ('we', 101),
   ('they', 81),
   ('i', 74),
   ('us', 71),
   ('you', 70),
   ('themselves', 12),
   ('itself', 10),
   ('me', 7)],
  'verbs': [('be', 305),
   ('have', 34),
   ('give', 14),
   ('pay', 11),
   ('maintain', 10),
   ('meet', 10),
   ('call', 9),
   ('receive', 9),
   ('make', 8),
   ('require', 8)]},
 'vocabulary size': [3155, 2864]}

James Madison (1809-1816)


In [7]:
pos_stats('4JM.txt')


Out[7]:
{'POS counts': {None: 15814,
  'adjectives': 1673,
  'adverbs': 736,
  'noun': 3983,
  'pronouns': 506,
  'verbs': 680},
 'avg chars per word': 4.690975932971401,
 'number of capital letters': 1461,
 'stop word frequency': 0.5304150814346172,
 'top words by pos': {None: [('where', 18), ('when', 9), ('how', 4)],
  'adjectives': [('other', 71),
   ('british', 61),
   ('such', 51),
   ('great', 50),
   ('national', 42),
   ('american', 41),
   ('foreign', 25),
   ('last', 24),
   ('military', 24),
   ('general', 23)],
  'adverbs': [('not', 137),
   ('also', 37),
   ('so', 36),
   ('well', 33),
   ('now', 21),
   ('just', 19),
   ('however', 16),
   ('particularly', 15),
   ('only', 14),
   ('far', 13)],
  'noun': [('war', 85),
   ('government', 62),
   ('public', 53),
   ('congress', 49),
   ('country', 45),
   ('enemy', 43),
   ('peace', 36),
   ('treasury', 33),
   ('force', 32),
   ('nation', 32)],
  'pronouns': [('it', 224),
   ('them', 73),
   ('i', 49),
   ('they', 45),
   ('we', 24),
   ('us', 23),
   ('itself', 17),
   ('you', 17),
   ('he', 13),
   ('themselves', 6)],
  'verbs': [('be', 239),
   ('have', 46),
   ('give', 8),
   ('require', 7),
   ('afford', 6),
   ('fail', 6),
   ('meet', 6),
   ('defray', 5),
   ('find', 5),
   ('accept', 4)]},
 'vocabulary size': [3407, 3137]}

James Monroe (1817-1824)


In [8]:
pos_stats('5JM.txt')


Out[8]:
{'POS counts': {None: 31316,
  'adjectives': 3078,
  'adverbs': 1381,
  'noun': 7381,
  'pronouns': 1288,
  'verbs': 1579},
 'avg chars per word': 4.544700080384105,
 'number of capital letters': 3541,
 'stop word frequency': 0.5404418953268592,
 'top words by pos': {None: [('``', 4)],
  'adjectives': [('other', 161),
   ('great', 160),
   ('such', 115),
   ('last', 94),
   ('important', 50),
   ('necessary', 48),
   ('several', 46),
   ('new', 44),
   ('first', 43),
   ('whole', 43)],
  'adverbs': [('not', 219),
   ('so', 71),
   ('also', 45),
   ('now', 38),
   ('just', 37),
   ('very', 34),
   ('only', 32),
   ('however', 31),
   ('well', 28),
   ('still', 27)],
  'noun': [('government', 166),
   ('congress', 106),
   ('spain', 93),
   ('treaty', 77),
   ('year', 73),
   ('public', 72),
   ('state', 71),
   ('war', 71),
   ('power', 65),
   ('commerce', 64)],
  'pronouns': [('it', 655),
   ('them', 143),
   ('we', 119),
   ('they', 113),
   ('i', 100),
   ('us', 34),
   ('he', 26),
   ('themselves', 22),
   ('you', 21),
   ('itself', 16)],
  'verbs': [('be', 563),
   ('have', 62),
   ('make', 25),
   ('give', 19),
   ('fail', 14),
   ('likewise', 13),
   ('believe', 12),
   ('extend', 12),
   ('meet', 12),
   ('preserve', 12)]},
 'vocabulary size': [4039, 3728]}

John Quincy Adams (1825-1828)


In [9]:
pos_stats('6JQA.txt')


Out[9]:
{'POS counts': {None: 23264,
  'adjectives': 2366,
  'adverbs': 971,
  'noun': 5614,
  'pronouns': 664,
  'verbs': 858},
 'avg chars per word': 4.619112506665087,
 'number of capital letters': 2456,
 'stop word frequency': 0.5335031696190533,
 'top words by pos': {None: [('``', 2)],
  'adjectives': [('other', 95),
   ('last', 92),
   ('great', 71),
   ('first', 53),
   ('own', 50),
   ('same', 42),
   ('commercial', 36),
   ('national', 29),
   ('general', 28),
   ('navy', 27)],
  'adverbs': [('not', 111),
   ('so', 51),
   ('now', 34),
   ('only', 33),
   ('yet', 30),
   ('still', 27),
   ('even', 21),
   ('far', 20),
   ('nearly', 20),
   ('ever', 18)],
  'noun': [('congress', 131),
   ('year', 81),
   ('act', 72),
   ('government', 61),
   ('country', 57),
   ('union', 51),
   ('present', 50),
   ('war', 50),
   ('nation', 49),
   ('improvement', 46)],
  'pronouns': [('it', 250),
   ('them', 105),
   ('they', 86),
   ('we', 85),
   ('i', 28),
   ('he', 20),
   ('itself', 19),
   ('us', 19),
   ('themselves', 14),
   ('you', 14)],
  'verbs': [('be', 284),
   ('have', 36),
   ('continue', 10),
   ('produce', 7),
   ('provide', 7),
   ('receive', 7),
   ('require', 7),
   ('ascertain', 6),
   ('come', 6),
   ('contribute', 6)]},
 'vocabulary size': [4114, 3820]}

Andrew Jackson (1829-1836)


In [10]:
pos_stats('7AJ.txt')


Out[10]:
{'POS counts': {None: 61271,
  'adjectives': 5973,
  'adverbs': 3276,
  'noun': 15029,
  'pronouns': 2593,
  'verbs': 3517},
 'avg chars per word': 4.610466955355098,
 'number of capital letters': 6203,
 'stop word frequency': 0.5420761570261455,
 'top words by pos': {None: [('``', 16)],
  'adjectives': [('such', 223),
   ('other', 159),
   ('great', 146),
   ('general', 138),
   ('last', 119),
   ('necessary', 110),
   ('new', 84),
   ('public', 84),
   ('same', 84),
   ('important', 83)],
  'adverbs': [('not', 546),
   ('so', 196),
   ('now', 128),
   ('well', 79),
   ('only', 74),
   ('however', 69),
   ('just', 67),
   ('ever', 65),
   ('thus', 56),
   ('yet', 56)],
  'noun': [('government', 431),
   ('congress', 236),
   ('country', 185),
   ('public', 181),
   ('state', 172),
   ('subject', 156),
   ('present', 146),
   ('power', 134),
   ('time', 134),
   ('treaty', 113)],
  'pronouns': [('it', 1028),
   ('i', 424),
   ('they', 268),
   ('them', 196),
   ('we', 176),
   ('you', 112),
   ('us', 100),
   ('me', 76),
   ('he', 57),
   ('him', 45)],
  'verbs': [('be', 1193),
   ('have', 123),
   ('make', 59),
   ('give', 33),
   ('receive', 33),
   ('pay', 30),
   ('require', 24),
   ('avoid', 21),
   ('secure', 21),
   ('expect', 20)]},
 'vocabulary size': [6294, 5925]}

Martin van Buren (1837-1840)


In [11]:
pos_stats('8MVB.txt')


Out[11]:
{'POS counts': {None: 32921,
  'adjectives': 3508,
  'adverbs': 1824,
  'noun': 7953,
  'pronouns': 1229,
  'verbs': 1561},
 'avg chars per word': 4.661721794845635,
 'number of capital letters': 3115,
 'stop word frequency': 0.5309037484440998,
 'top words by pos': {None: [('``', 7)],
  'adjectives': [('such', 110),
   ('general', 85),
   ('other', 85),
   ('public', 76),
   ('great', 72),
   ('last', 68),
   ('new', 66),
   ('own', 60),
   ('same', 60),
   ('large', 52)],
  'adverbs': [('not', 242),
   ('so', 120),
   ('now', 66),
   ('only', 62),
   ('also', 50),
   ('thus', 35),
   ('well', 33),
   ('still', 31),
   ('however', 29),
   ('yet', 29)],
  'noun': [('government', 255),
   ('public', 131),
   ('country', 110),
   ('congress', 93),
   ('state', 82),
   ('time', 78),
   ('subject', 63),
   ('treasury', 62),
   ('system', 61),
   ('year', 61)],
  'pronouns': [('it', 459),
   ('they', 174),
   ('them', 167),
   ('i', 164),
   ('we', 64),
   ('you', 52),
   ('us', 32),
   ('me', 30),
   ('themselves', 30),
   ('itself', 17)],
  'verbs': [('be', 509),
   ('have', 50),
   ('make', 21),
   ('maintain', 16),
   ('require', 16),
   ('prevent', 15),
   ('bring', 14),
   ('carry', 14),
   ('afford', 12),
   ('believe', 12)]},
 'vocabulary size': [4915, 4558]}

John Tyler (1841-1844)


In [12]:
pos_stats('10JT.txt')


Out[12]:
{'POS counts': {None: 24246,
  'adjectives': 2394,
  'adverbs': 1260,
  'noun': 6256,
  'pronouns': 872,
  'verbs': 1508},
 'avg chars per word': 4.588795533539506,
 'number of capital letters': 2459,
 'stop word frequency': 0.5375078683050987,
 'top words by pos': {None: [('``', 2)],
  'adjectives': [('great', 101),
   ('such', 100),
   ('other', 91),
   ('last', 51),
   ('same', 41),
   ('necessary', 36),
   ('many', 31),
   ('foreign', 29),
   ('own', 29),
   ('large', 28)],
  'adverbs': [('not', 183),
   ('so', 83),
   ('only', 48),
   ('thus', 40),
   ('well', 40),
   ('now', 32),
   ('however', 28),
   ('also', 26),
   ('far', 26),
   ('greatly', 21)],
  'noun': [('government', 209),
   ('congress', 85),
   ('country', 79),
   ('time', 67),
   ('public', 63),
   ('treasury', 59),
   ('state', 56),
   ('mexico', 51),
   ('amount', 50),
   ('war', 48)],
  'pronouns': [('it', 335),
   ('i', 139),
   ('them', 77),
   ('you', 58),
   ('we', 54),
   ('they', 48),
   ('us', 28),
   ('itself', 26),
   ('she', 20),
   ('he', 18)],
  'verbs': [('be', 456),
   ('have', 71),
   ('make', 21),
   ('do', 17),
   ('take', 14),
   ('receive', 13),
   ('but', 11),
   ('meet', 11),
   ('require', 11),
   ('fail', 10)]},
 'vocabulary size': [4278, 3955]}

James Polk (1845-1848)


In [13]:
pos_stats('11JP.txt')


Out[13]:
{'POS counts': {None: 52999,
  'adjectives': 5271,
  'adverbs': 2225,
  'noun': 12995,
  'pronouns': 2038,
  'verbs': 2920},
 'avg chars per word': 4.536808838708855,
 'number of capital letters': 6436,
 'stop word frequency': 0.5226765894001758,
 'top words by pos': {None: [('``', 126)],
  'adjectives': [('such', 196),
   ('other', 175),
   ('last', 130),
   ('great', 128),
   ('foreign', 99),
   ('new', 98),
   ('own', 93),
   ('same', 76),
   ('necessary', 73),
   ('public', 71)],
  'adverbs': [('not', 353),
   ('so', 120),
   ('just', 67),
   ('now', 64),
   ('only', 62),
   ('well', 56),
   ('thus', 51),
   ('still', 50),
   ('also', 45),
   ('ever', 37)],
  'noun': [('government', 323),
   ('mexico', 319),
   ('war', 251),
   ('congress', 221),
   ('country', 216),
   ('public', 139),
   ('power', 130),
   ('state', 112),
   ('act', 111),
   ('peace', 111)],
  'pronouns': [('it', 774),
   ('they', 269),
   ('them', 239),
   ('i', 168),
   ('he', 154),
   ('we', 151),
   ('him', 56),
   ('she', 47),
   ('themselves', 38),
   ('you', 34)],
  'verbs': [('be', 943),
   ('have', 123),
   ('make', 52),
   ('mexico', 33),
   ('become', 26),
   ('give', 25),
   ('pay', 20),
   ('meet', 19),
   ('remain', 19),
   ('take', 19)]},
 'vocabulary size': [5722, 5347]}

Zachary Taylor (1849)


In [14]:
pos_stats('12ZT.txt')


Out[14]:
{'POS counts': {None: 5411,
  'adjectives': 575,
  'adverbs': 211,
  'noun': 1468,
  'pronouns': 222,
  'verbs': 337},
 'avg chars per word': 4.661196207148067,
 'number of capital letters': 777,
 'stop word frequency': 0.524677850717238,
 'top words by pos': {None: [('``', 4)],
  'adjectives': [('such', 29),
   ('new', 18),
   ('other', 18),
   ('great', 13),
   ('necessary', 12),
   ('same', 12),
   ('american', 9),
   ('further', 9),
   ('republic', 9),
   ('secretary', 9)],
  'adverbs': [('not', 30),
   ('also', 10),
   ('now', 9),
   ('well', 8),
   ('so', 7),
   ('especially', 6),
   ('only', 5),
   ('respectfully', 5),
   ('still', 5),
   ('therefore', 5)],
  'noun': [('congress', 36),
   ('government', 34),
   ('treaty', 18),
   ('california', 15),
   ('country', 14),
   ('mexico', 14),
   ('state', 14),
   ('act', 13),
   ('constitution', 13),
   ('power', 13)],
  'pronouns': [('it', 77),
   ('i', 62),
   ('we', 20),
   ('them', 13),
   ('they', 12),
   ('he', 6),
   ('him', 6),
   ('us', 6),
   ('you', 6),
   ('me', 5)],
  'verbs': [('be', 102),
   ('have', 7),
   ('become', 6),
   ('maintain', 5),
   ('make', 5),
   ('do', 4),
   ('encourage', 3),
   ('examine', 3),
   ('fail', 3),
   ('give', 3)]},
 'vocabulary size': [1869, 1644]}

Millard Fillmore (1850-1852)


In [15]:
pos_stats('13MF.txt')


Out[15]:
{'POS counts': {None: 22890,
  'adjectives': 2290,
  'adverbs': 1025,
  'noun': 5831,
  'pronouns': 846,
  'verbs': 1178},
 'avg chars per word': 4.606316778208289,
 'number of capital letters': 2572,
 'stop word frequency': 0.5303217095221322,
 'top words by pos': {None: [('``', 12)],
  'adjectives': [('such', 101),
   ('last', 82),
   ('other', 63),
   ('foreign', 56),
   ('great', 50),
   ('necessary', 40),
   ('own', 40),
   ('new', 34),
   ('same', 33),
   ('annual', 29)],
  'adverbs': [('not', 120),
   ('so', 56),
   ('also', 37),
   ('however', 32),
   ('now', 31),
   ('only', 25),
   ('well', 24),
   ('very', 17),
   ('yet', 17),
   ('far', 16)],
  'noun': [('government', 131),
   ('country', 104),
   ('congress', 88),
   ('year', 75),
   ('subject', 59),
   ('law', 54),
   ('state', 54),
   ('department', 53),
   ('duty', 46),
   ('public', 40)],
  'pronouns': [('it', 295),
   ('i', 175),
   ('they', 89),
   ('them', 70),
   ('we', 52),
   ('you', 37),
   ('he', 33),
   ('us', 26),
   ('me', 24),
   ('him', 10)],
  'verbs': [('be', 396),
   ('make', 28),
   ('have', 27),
   ('give', 14),
   ('maintain', 12),
   ('protect', 11),
   ('become', 9),
   ('require', 9),
   ('carry', 8),
   ('see', 8)]},
 'vocabulary size': [4386, 4039]}

Franklin Pierce (1853-1856)


In [16]:
pos_stats('14FP.txt')


Out[16]:
{'POS counts': {None: 30140,
  'adjectives': 3455,
  'adverbs': 1534,
  'noun': 7886,
  'pronouns': 983,
  'verbs': 1365},
 'avg chars per word': 4.679355492858402,
 'number of capital letters': 3536,
 'stop word frequency': 0.514018691588785,
 'top words by pos': {None: [('``', 16)],
  'adjectives': [('such', 109),
   ('great', 105),
   ('other', 101),
   ('last', 65),
   ('general', 56),
   ('new', 52),
   ('public', 51),
   ('foreign', 50),
   ('same', 47),
   ('political', 46)],
  'adverbs': [('not', 232),
   ('so', 76),
   ('only', 72),
   ('thus', 58),
   ('now', 52),
   ('also', 39),
   ('well', 30),
   ('still', 28),
   ('yet', 26),
   ('far', 22)],
  'noun': [('government', 173),
   ('congress', 108),
   ('country', 87),
   ('union', 81),
   ('state', 76),
   ('time', 66),
   ('part', 65),
   ('subject', 65),
   ('law', 64),
   ('public', 63)],
  'pronouns': [('it', 422),
   ('i', 127),
   ('they', 113),
   ('them', 84),
   ('we', 41),
   ('he', 34),
   ('us', 32),
   ('me', 30),
   ('themselves', 24),
   ('you', 24)],
  'verbs': [('be', 350),
   ('have', 54),
   ('give', 18),
   ('make', 15),
   ('maintain', 14),
   ('take', 12),
   ('believe', 11),
   ('act', 9),
   ('continue', 9),
   ('meet', 9)]},
 'vocabulary size': [5047, 4688]}

James Buchanan (1857-1860)


In [17]:
pos_stats('15JB.txt')


Out[17]:
{'POS counts': {None: 40467,
  'adjectives': 4332,
  'adverbs': 2152,
  'noun': 10678,
  'pronouns': 1626,
  'verbs': 2448},
 'avg chars per word': 4.577206537408685,
 'number of capital letters': 5713,
 'stop word frequency': 0.520984174806032,
 'top words by pos': {None: [('``', 155)],
  'adjectives': [('such', 177),
   ('other', 140),
   ('last', 104),
   ('great', 85),
   ('own', 80),
   ('necessary', 72),
   ('same', 68),
   ('fiscal', 58),
   ('republic', 58),
   ('general', 50)],
  'adverbs': [('not', 323),
   ('so', 77),
   ('then', 60),
   ('now', 59),
   ('well', 48),
   ('however', 47),
   ('thus', 47),
   ('only', 44),
   ('never', 43),
   ('still', 43)],
  'noun': [('government', 277),
   ('congress', 222),
   ('constitution', 173),
   ('state', 156),
   ('power', 125),
   ('year', 115),
   ('country', 108),
   ('present', 100),
   ('union', 98),
   ('time', 97)],
  'pronouns': [('it', 575),
   ('i', 259),
   ('they', 215),
   ('them', 135),
   ('we', 131),
   ('he', 104),
   ('themselves', 33),
   ('us', 32),
   ('itself', 30),
   ('him', 26)],
  'verbs': [('be', 647),
   ('have', 143),
   ('make', 33),
   ('carry', 22),
   ('do', 22),
   ('protect', 22),
   ('take', 22),
   ('prevent', 20),
   ('employ', 19),
   ('secure', 18)]},
 'vocabulary size': [5345, 4964]}

Abraham Lincoln (1861-1864)


In [18]:
pos_stats('16AL.txt')


Out[18]:
{'POS counts': {None: 19972,
  'adjectives': 2224,
  'adverbs': 1284,
  'noun': 4904,
  'pronouns': 829,
  'verbs': 1034},
 'avg chars per word': 4.531990090834022,
 'number of capital letters': 2529,
 'stop word frequency': 0.5106853839801817,
 'top words by pos': {None: [('``', 8)],
  'adjectives': [('such', 82),
   ('other', 69),
   ('great', 60),
   ('last', 56),
   ('national', 49),
   ('new', 48),
   ('same', 39),
   ('general', 38),
   ('foreign', 37),
   ('free', 27)],
  'adverbs': [('not', 200),
   ('so', 66),
   ('now', 64),
   ('also', 39),
   ('only', 31),
   ('there', 26),
   ('much', 24),
   ('yet', 24),
   ('thus', 23),
   ('well', 23)],
  'noun': [('congress', 81),
   ('government', 75),
   ('year', 68),
   ('war', 65),
   ('country', 63),
   ('union', 54),
   ('time', 50),
   ('state', 45),
   ('part', 38),
   ('department', 36)],
  'pronouns': [('it', 305),
   ('i', 166),
   ('we', 106),
   ('them', 82),
   ('they', 55),
   ('us', 26),
   ('you', 19),
   ('he', 17),
   ('themselves', 14),
   ('itself', 13)],
  'verbs': [('be', 318),
   ('have', 34),
   ('make', 16),
   ('pay', 12),
   ('do', 11),
   ('give', 11),
   ('provide', 9),
   ('say', 9),
   ('take', 9),
   ('become', 8)]},
 'vocabulary size': [4240, 3896]}

Andrew Johnson (1865-1868)


In [19]:
pos_stats('17AJ.txt')


Out[19]:
{'POS counts': {None: 28088,
  'adjectives': 2859,
  'adverbs': 1493,
  'noun': 7137,
  'pronouns': 947,
  'verbs': 1367},
 'avg chars per word': 4.60599408227546,
 'number of capital letters': 3263,
 'stop word frequency': 0.5096878877541281,
 'top words by pos': {None: [('``', 40)],
  'adjectives': [('great', 84),
   ('national', 66),
   ('other', 66),
   ('last', 61),
   ('such', 61),
   ('own', 48),
   ('general', 46),
   ('free', 39),
   ('military', 36),
   ('political', 36)],
  'adverbs': [('not', 226),
   ('so', 90),
   ('now', 62),
   ('only', 43),
   ('however', 30),
   ('just', 28),
   ('well', 28),
   ('thus', 27),
   ('long', 25),
   ('much', 21)],
  'noun': [('government', 207),
   ('constitution', 115),
   ('congress', 110),
   ('war', 96),
   ('country', 91),
   ('year', 86),
   ('union', 68),
   ('state', 64),
   ('power', 59),
   ('time', 57)],
  'pronouns': [('it', 390),
   ('they', 106),
   ('we', 102),
   ('i', 94),
   ('them', 75),
   ('us', 52),
   ('he', 34),
   ('themselves', 25),
   ('itself', 21),
   ('you', 14)],
  'verbs': [('be', 449),
   ('have', 60),
   ('become', 18),
   ('make', 16),
   ('protect', 13),
   ('give', 12),
   ('let', 11),
   ('take', 11),
   ('preserve', 10),
   ('require', 10)]},
 'vocabulary size': [4938, 4592]}

Ulysses S. Grant (1869-1876)


In [20]:
pos_stats('18USG.txt')


Out[20]:
{'POS counts': {None: 48281,
  'adjectives': 4701,
  'adverbs': 2312,
  'noun': 12049,
  'pronouns': 1578,
  'verbs': 2425},
 'avg chars per word': 4.555521309086833,
 'number of capital letters': 6131,
 'stop word frequency': 0.5140379388607134,
 'top words by pos': {None: [('``', 40)],
  'adjectives': [('such', 187),
   ('other', 149),
   ('great', 133),
   ('last', 125),
   ('foreign', 74),
   ('necessary', 67),
   ('large', 62),
   ('same', 60),
   ('new', 56),
   ('many', 54)],
  'adverbs': [('not', 346),
   ('so', 141),
   ('now', 116),
   ('also', 67),
   ('only', 64),
   ('however', 56),
   ('thus', 50),
   ('far', 47),
   ('well', 45),
   ('much', 42)],
  'noun': [('government', 287),
   ('congress', 241),
   ('year', 171),
   ('country', 138),
   ('time', 121),
   ('part', 96),
   ('report', 91),
   ('attention', 88),
   ('law', 87),
   ('state', 87)],
  'pronouns': [('it', 580),
   ('i', 425),
   ('they', 148),
   ('them', 133),
   ('we', 85),
   ('me', 43),
   ('us', 39),
   ('he', 28),
   ('itself', 21),
   ('you', 21)],
  'verbs': [('be', 781),
   ('have', 84),
   ('secure', 42),
   ('make', 38),
   ('take', 28),
   ('give', 27),
   ('become', 26),
   ('provide', 26),
   ('do', 20),
   ('receive', 17)]},
 'vocabulary size': [6270, 5891]}

Rutherford B. Hayes (1877-1880)


In [21]:
pos_stats('19RBH.txt')


Out[21]:
{'POS counts': {None: 24966,
  'adjectives': 2734,
  'adverbs': 1119,
  'noun': 6694,
  'pronouns': 591,
  'verbs': 1116},
 'avg chars per word': 4.70405285633712,
 'number of capital letters': 2983,
 'stop word frequency': 0.503907823704778,
 'top words by pos': {None: [('``', 13)],
  'adjectives': [('such', 100),
   ('other', 70),
   ('great', 52),
   ('last', 49),
   ('general', 46),
   ('fiscal', 44),
   ('public', 38),
   ('national', 35),
   ('important', 32),
   ('new', 32)],
  'adverbs': [('not', 122),
   ('now', 59),
   ('so', 51),
   ('also', 37),
   ('only', 33),
   ('well', 26),
   ('however', 22),
   ('very', 22),
   ('just', 20),
   ('still', 18)],
  'noun': [('government', 154),
   ('congress', 146),
   ('year', 113),
   ('country', 103),
   ('service', 82),
   ('public', 73),
   ('attention', 56),
   ('interest', 50),
   ('report', 47),
   ('subject', 47)],
  'pronouns': [('it', 254),
   ('i', 147),
   ('them', 55),
   ('they', 55),
   ('we', 20),
   ('themselves', 15),
   ('me', 12),
   ('he', 11),
   ('itself', 7),
   ('you', 7)],
  'verbs': [('be', 384),
   ('have', 25),
   ('make', 24),
   ('secure', 13),
   ('become', 11),
   ('receive', 11),
   ('take', 11),
   ('give', 10),
   ('pay', 10),
   ('provide', 10)]},
 'vocabulary size': [4430, 4126]}

Chester A. Arthur (1881-1884)


In [22]:
pos_stats('21CA.txt')


Out[22]:
{'POS counts': {None: 14250,
  'adjectives': 1591,
  'adverbs': 570,
  'noun': 3851,
  'pronouns': 343,
  'verbs': 557},
 'avg chars per word': 4.831955792754924,
 'number of capital letters': 2361,
 'stop word frequency': 0.485807396212157,
 'top words by pos': {None: [('``', 1)],
  'adjectives': [('such', 68),
   ('foreign', 36),
   ('other', 36),
   ('american', 33),
   ('last', 30),
   ('national', 29),
   ('international', 25),
   ('diplomatic', 22),
   ('general', 22),
   ('commercial', 20)],
  'adverbs': [('not', 63),
   ('now', 43),
   ('so', 24),
   ('thus', 20),
   ('already', 15),
   ('also', 15),
   ('lately', 15),
   ('july', 14),
   ('only', 12),
   ('still', 12)],
  'noun': [('government', 129),
   ('congress', 63),
   ('treaty', 46),
   ('country', 43),
   ('year', 43),
   ('intercourse', 26),
   ('subject', 26),
   ('attention', 25),
   ('state', 24),
   ('revenue', 23)],
  'pronouns': [('it', 113),
   ('i', 111),
   ('they', 27),
   ('you', 20),
   ('we', 19),
   ('them', 14),
   ('me', 11),
   ('he', 6),
   ('us', 6),
   ('itself', 5)],
  'verbs': [('be', 195),
   ('make', 15),
   ('provide', 12),
   ('become', 7),
   ('meet', 7),
   ('secure', 7),
   ('attend', 5),
   ('result', 5),
   ('send', 5),
   ('believe', 4)]},
 'vocabulary size': [3702, 3416]}

Grover Cleveland (1885-1888, 1893-1896)


In [23]:
pos_stats('22GC.txt')


Out[23]:
{'POS counts': {None: 78023,
  'adjectives': 8176,
  'adverbs': 3803,
  'noun': 21330,
  'pronouns': 1890,
  'verbs': 3200},
 'avg chars per word': 4.734436415440861,
 'number of capital letters': 8373,
 'stop word frequency': 0.500660819416076,
 'top words by pos': {None: [('``', 18)],
  'adjectives': [('such', 401),
   ('other', 195),
   ('last', 194),
   ('fiscal', 126),
   ('great', 119),
   ('american', 111),
   ('necessary', 100),
   ('foreign', 97),
   ('public', 97),
   ('large', 93)],
  'adverbs': [('not', 499),
   ('now', 169),
   ('so', 146),
   ('only', 129),
   ('thus', 116),
   ('also', 79),
   ('well', 75),
   ('far', 71),
   ('however', 65),
   ('still', 61)],
  'noun': [('government', 561),
   ('year', 433),
   ('congress', 208),
   ('present', 173),
   ('country', 169),
   ('gold', 160),
   ('law', 156),
   ('time', 156),
   ('service', 154),
   ('treasury', 141)],
  'pronouns': [('it', 688),
   ('i', 363),
   ('they', 238),
   ('we', 173),
   ('them', 156),
   ('he', 74),
   ('us', 73),
   ('me', 30),
   ('themselves', 26),
   ('him', 21)],
  'verbs': [('be', 1172),
   ('have', 65),
   ('make', 47),
   ('meet', 45),
   ('prevent', 30),
   ('secure', 30),
   ('pay', 27),
   ('maintain', 24),
   ('give', 23),
   ('protect', 19)]},
 'vocabulary size': [9253, 8858]}

Benjamin Harrison (1889-1892)


In [24]:
pos_stats('23BH.txt')


Out[24]:
{'POS counts': {None: 39764,
  'adjectives': 4069,
  'adverbs': 1942,
  'noun': 10366,
  'pronouns': 1042,
  'verbs': 1783},
 'avg chars per word': 4.5993831449439915,
 'number of capital letters': 4913,
 'stop word frequency': 0.5115660323001576,
 'top words by pos': {None: [('``', 25)],
  'adjectives': [('such', 142),
   ('great', 127),
   ('last', 117),
   ('new', 102),
   ('general', 89),
   ('other', 86),
   ('american', 68),
   ('large', 65),
   ('same', 59),
   ('foreign', 52)],
  'adverbs': [('not', 332),
   ('now', 118),
   ('so', 97),
   ('very', 84),
   ('only', 74),
   ('well', 33),
   ('there', 29),
   ('just', 27),
   ('nearly', 27),
   ('also', 24)],
  'noun': [('government', 208),
   ('year', 182),
   ('congress', 147),
   ('law', 128),
   ('work', 93),
   ('legislation', 92),
   ('increase', 82),
   ('time', 72),
   ('value', 72),
   ('department', 71)],
  'pronouns': [('it', 360),
   ('i', 262),
   ('we', 112),
   ('them', 86),
   ('they', 80),
   ('he', 34),
   ('us', 33),
   ('me', 26),
   ('him', 17),
   ('themselves', 11)],
  'verbs': [('be', 619),
   ('have', 63),
   ('secure', 38),
   ('give', 33),
   ('make', 29),
   ('promote', 20),
   ('take', 18),
   ('continue', 13),
   ('doubt', 12),
   ('maintain', 12)]},
 'vocabulary size': [5985, 5653]}

William McKinley (1897-1900)


In [25]:
pos_stats('25WM.txt')


Out[25]:
{'POS counts': {None: 48509,
  'adjectives': 5456,
  'adverbs': 1913,
  'noun': 13685,
  'pronouns': 1042,
  'verbs': 2109},
 'avg chars per word': 4.686108019729882,
 'number of capital letters': 7440,
 'stop word frequency': 0.4842202162592913,
 'top words by pos': {None: [('``', 21)],
  'adjectives': [('such', 129),
   ('other', 111),
   ('american', 101),
   ('general', 93),
   ('great', 92),
   ('international', 84),
   ('last', 79),
   ('new', 74),
   ('foreign', 67),
   ('military', 67)],
  'adverbs': [('not', 264),
   ('so', 97),
   ('now', 82),
   ('only', 66),
   ('july', 47),
   ('thus', 39),
   ('already', 34),
   ('also', 32),
   ('then', 31),
   ('well', 31)],
  'noun': [('government', 398),
   ('congress', 187),
   ('year', 125),
   ('war', 114),
   ('act', 111),
   ('country', 104),
   ('commission', 101),
   ('cuba', 99),
   ('time', 79),
   ('part', 72)],
  'pronouns': [('it', 422),
   ('i', 216),
   ('we', 94),
   ('they', 87),
   ('them', 62),
   ('us', 36),
   ('he', 26),
   ('you', 22),
   ('him', 18),
   ('itself', 14)],
  'verbs': [('be', 671),
   ('have', 59),
   ('make', 39),
   ('give', 27),
   ('take', 26),
   ('provide', 25),
   ('meet', 22),
   ('continue', 18),
   ('secure', 17),
   ('spain', 15)]},
 'vocabulary size': [7409, 7022]}

Theodore Roosevelt (1901-1908)


In [26]:
pos_stats('26TR.txt')


Out[26]:
{'POS counts': {None: 109810,
  'adjectives': 13197,
  'adverbs': 7062,
  'noun': 29831,
  'pronouns': 4264,
  'verbs': 6935},
 'avg chars per word': 4.519374499640618,
 'number of capital letters': 12236,
 'stop word frequency': 0.5180830611183507,
 'top words by pos': {None: [('``', 76)],
  'adjectives': [('such', 603),
   ('great', 381),
   ('other', 331),
   ('national', 251),
   ('good', 213),
   ('many', 171),
   ('necessary', 160),
   ('own', 160),
   ('possible', 158),
   ('public', 154)],
  'adverbs': [('not', 912),
   ('so', 390),
   ('only', 291),
   ('now', 265),
   ('far', 152),
   ('well', 139),
   ('also', 128),
   ('just', 120),
   ('there', 116),
   ('even', 115)],
  'noun': [('government', 504),
   ('law', 384),
   ('congress', 308),
   ('country', 300),
   ('business', 261),
   ('nation', 248),
   ('work', 244),
   ('man', 242),
   ('power', 214),
   ('service', 211)],
  'pronouns': [('it', 1695),
   ('we', 644),
   ('they', 502),
   ('i', 430),
   ('them', 357),
   ('he', 172),
   ('us', 138),
   ('themselves', 100),
   ('itself', 76),
   ('him', 63)],
  'verbs': [('be', 2218),
   ('have', 177),
   ('do', 153),
   ('make', 148),
   ('secure', 97),
   ('take', 92),
   ('prevent', 72),
   ('give', 67),
   ('see', 63),
   ('provide', 53)]},
 'vocabulary size': [10198, 9761]}

William H. Taft (1909-1912)


In [27]:
pos_stats('27WT.txt')


Out[27]:
{'POS counts': {None: 48794,
  'adjectives': 5936,
  'adverbs': 2218,
  'noun': 14325,
  'pronouns': 1381,
  'verbs': 2364},
 'avg chars per word': 4.713081011949962,
 'number of capital letters': 12397,
 'stop word frequency': 0.49694256824267613,
 'top words by pos': {None: [('``', 19)],
  'adjectives': [('such', 208),
   ('american', 197),
   ('other', 159),
   ('foreign', 157),
   ('great', 143),
   ('international', 95),
   ('new', 95),
   ('last', 93),
   ('national', 90),
   ('general', 73)],
  'adverbs': [('not', 258),
   ('now', 128),
   ('so', 97),
   ('only', 69),
   ('thus', 65),
   ('also', 64),
   ('well', 48),
   ('very', 45),
   ('already', 41),
   ('there', 37)],
  'noun': [('government', 344),
   ('congress', 189),
   ('department', 177),
   ('law', 138),
   ('year', 138),
   ('state', 114),
   ('country', 111),
   ('system', 104),
   ('service', 99),
   ('time', 90)],
  'pronouns': [('it', 528),
   ('i', 372),
   ('they', 116),
   ('we', 112),
   ('them', 80),
   ('me', 40),
   ('he', 32),
   ('itself', 28),
   ('you', 21),
   ('us', 18)],
  'verbs': [('be', 686),
   ('have', 74),
   ('make', 62),
   ('secure', 35),
   ('take', 31),
   ('give', 29),
   ('meet', 29),
   ('prevent', 25),
   ('do', 21),
   ('bring', 15)]},
 'vocabulary size': [7153, 6743]}

Woodrow Wilson (1913-1920)


In [28]:
pos_stats('28WW.txt')


Out[28]:
{'POS counts': {None: 23885,
  'adjectives': 2637,
  'adverbs': 1610,
  'noun': 5955,
  'pronouns': 1759,
  'verbs': 1786},
 'avg chars per word': 4.43157363373097,
 'number of capital letters': 2310,
 'stop word frequency': 0.5458168389170807,
 'top words by pos': {None: [('``', 10)],
  'adjectives': [('great', 99),
   ('own', 81),
   ('other', 74),
   ('such', 73),
   ('necessary', 61),
   ('last', 38),
   ('many', 36),
   ('possible', 35),
   ('whole', 34),
   ('national', 30)],
  'adverbs': [('not', 277),
   ('now', 76),
   ('only', 65),
   ('very', 57),
   ('also', 49),
   ('so', 48),
   ('even', 27),
   ('already', 25),
   ('no', 23),
   ('yet', 23)],
  'noun': [('government', 104),
   ('war', 95),
   ('country', 94),
   ('congress', 86),
   ('world', 79),
   ('peace', 70),
   ('present', 61),
   ('time', 56),
   ('nation', 54),
   ('matter', 47)],
  'pronouns': [('it', 457),
   ('we', 425),
   ('i', 287),
   ('they', 156),
   ('them', 129),
   ('us', 84),
   ('you', 68),
   ('me', 49),
   ('itself', 30),
   ('themselves', 30)],
  'verbs': [('be', 474),
   ('make', 66),
   ('have', 59),
   ('do', 41),
   ('take', 27),
   ('say', 22),
   ('give', 16),
   ('see', 16),
   ('serve', 16),
   ('find', 15)]},
 'vocabulary size': [4445, 4102]}

Warren Harding (1921-1922)


In [29]:
pos_stats('29WH.txt')


Out[29]:
{'POS counts': {None: 7714,
  'adjectives': 915,
  'adverbs': 522,
  'noun': 2291,
  'pronouns': 431,
  'verbs': 509},
 'avg chars per word': 4.70887507066139,
 'number of capital letters': 829,
 'stop word frequency': 0.499152063312606,
 'top words by pos': {None: [('``', 3)],
  'adjectives': [('american', 31),
   ('such', 27),
   ('public', 22),
   ('federal', 19),
   ('national', 19),
   ('possible', 18),
   ('other', 16),
   ('necessary', 15),
   ('own', 15),
   ('republic', 14)],
  'adverbs': [('not', 94),
   ('so', 44),
   ('well', 27),
   ('very', 16),
   ('now', 15),
   ('only', 14),
   ('just', 11),
   ('much', 11),
   ('there', 11),
   ('already', 8)],
  'noun': [('government', 39),
   ('world', 38),
   ('congress', 29),
   ('war', 28),
   ('labor', 24),
   ('law', 22),
   ('transportation', 21),
   ('policy', 18),
   ('public', 17),
   ('railway', 17)],
  'pronouns': [('we', 141),
   ('it', 135),
   ('i', 59),
   ('they', 21),
   ('you', 18),
   ('us', 16),
   ('he', 10),
   ('itself', 7),
   ('them', 7),
   ('themselves', 7)],
  'verbs': [('be', 124),
   ('have', 17),
   ('make', 14),
   ('meet', 11),
   ('provide', 8),
   ('give', 7),
   ('believe', 6),
   ('do', 6),
   ('say', 6),
   ('assume', 5)]},
 'vocabulary size': [2607, 2326]}

Calvin Coolidge (1923-1928)


In [30]:
pos_stats('30CC.txt')


Out[30]:
{'POS counts': {None: 35706,
  'adjectives': 4233,
  'adverbs': 1940,
  'noun': 10467,
  'pronouns': 1542,
  'verbs': 2297},
 'avg chars per word': 4.707702435813035,
 'number of capital letters': 6874,
 'stop word frequency': 0.5040656192729925,
 'top words by pos': {None: [('``', 1)],
  'adjectives': [('such', 128),
   ('national', 122),
   ('other', 119),
   ('great', 98),
   ('federal', 87),
   ('necessary', 80),
   ('own', 79),
   ('public', 77),
   ('many', 69),
   ('large', 64)],
  'adverbs': [('not', 361),
   ('so', 81),
   ('only', 67),
   ('now', 63),
   ('already', 48),
   ('very', 46),
   ('well', 44),
   ('much', 40),
   ('also', 38),
   ('still', 37)],
  'noun': [('government', 267),
   ('country', 160),
   ('congress', 159),
   ('law', 99),
   ('present', 86),
   ('legislation', 84),
   ('time', 80),
   ('service', 79),
   ('war', 75),
   ('year', 74)],
  'pronouns': [('it', 629),
   ('we', 374),
   ('i', 171),
   ('they', 167),
   ('them', 62),
   ('he', 38),
   ('us', 33),
   ('themselves', 22),
   ('him', 14),
   ('me', 12)],
  'verbs': [('be', 758),
   ('have', 99),
   ('provide', 50),
   ('make', 41),
   ('meet', 34),
   ('continue', 32),
   ('secure', 28),
   ('maintain', 19),
   ('protect', 19),
   ('give', 18)]},
 'vocabulary size': [5736, 5362]}

Herbert Hoover (1929-1932)


In [31]:
pos_stats('31HH.txt')


Out[31]:
{'POS counts': {None: 17859,
  'adjectives': 2330,
  'adverbs': 803,
  'noun': 5200,
  'pronouns': 569,
  'verbs': 904},
 'avg chars per word': 4.887845403648185,
 'number of capital letters': 3684,
 'stop word frequency': 0.4817048943471194,
 'top words by pos': {None: [('``', 13)],
  'adjectives': [('such', 103),
   ('federal', 91),
   ('other', 77),
   ('economic', 63),
   ('public', 57),
   ('national', 56),
   ('many', 48),
   ('large', 39),
   ('further', 38),
   ('necessary', 32)],
  'adverbs': [('not', 92),
   ('now', 46),
   ('however', 29),
   ('also', 25),
   ('about', 23),
   ('so', 20),
   ('only', 19),
   ('thus', 17),
   ('again', 16),
   ('even', 16)],
  'noun': [('government', 116),
   ('congress', 98),
   ('year', 79),
   ('system', 56),
   ('construction', 55),
   ('country', 54),
   ('action', 51),
   ('time', 40),
   ('world', 40),
   ('state', 37)],
  'pronouns': [('it', 153),
   ('i', 146),
   ('we', 129),
   ('they', 45),
   ('them', 33),
   ('us', 21),
   ('itself', 12),
   ('me', 10),
   ('he', 9),
   ('themselves', 6)],
  'verbs': [('be', 266),
   ('have', 27),
   ('meet', 24),
   ('make', 20),
   ('give', 12),
   ('recommend', 9),
   ('secure', 9),
   ('otherwise', 8),
   ('assure', 7),
   ('develop', 7)]},
 'vocabulary size': [3993, 3682]}

Franklin D. Roosevelt (1934-1945)


In [32]:
pos_stats('32FDR.txt')


Out[32]:
{'POS counts': {None: 33437,
  'adjectives': 4164,
  'adverbs': 1878,
  'noun': 8970,
  'pronouns': 1987,
  'verbs': 2033},
 'avg chars per word': 4.421138830698914,
 'number of capital letters': 4228,
 'stop word frequency': 0.5006855836983432,
 'top words by pos': {None: [('``', 61)],
  'adjectives': [('national', 132),
   ('other', 125),
   ('new', 104),
   ('many', 95),
   ('great', 90),
   ('own', 84),
   ('such', 75),
   ('american', 65),
   ('economic', 57),
   ('first', 53)],
  'adverbs': [('not', 368),
   ('now', 81),
   ('only', 76),
   ('so', 58),
   ('even', 47),
   ('well', 41),
   ('also', 37),
   ('very', 32),
   ('here', 28),
   ('never', 28)],
  'noun': [('war', 242),
   ('world', 177),
   ('government', 158),
   ('nation', 142),
   ('congress', 114),
   ('peace', 111),
   ('time', 89),
   ('year', 85),
   ('production', 74),
   ('power', 72)],
  'pronouns': [('we', 710),
   ('it', 345),
   ('i', 283),
   ('they', 211),
   ('us', 121),
   ('them', 97),
   ('you', 87),
   ('itself', 33),
   ('themselves', 31),
   ('he', 25)],
  'verbs': [('be', 356),
   ('have', 59),
   ('make', 50),
   ('do', 35),
   ('give', 31),
   ('provide', 27),
   ('say', 27),
   ('work', 26),
   ('continue', 24),
   ('take', 24)]},
 'vocabulary size': [5665, 5282]}

Harry S. Truman (1946-1953)


In [33]:
pos_stats('33HT.txt')


Out[33]:
{'POS counts': {None: 46766,
  'adjectives': 6045,
  'adverbs': 2398,
  'noun': 12840,
  'pronouns': 2370,
  'verbs': 3081},
 'avg chars per word': 4.566929240798587,
 'number of capital letters': 7561,
 'stop word frequency': 0.47407306804291727,
 'top words by pos': {None: [('``', 30)],
  'adjectives': [('free', 170),
   ('economic', 163),
   ('fiscal', 142),
   ('other', 142),
   ('national', 139),
   ('great', 138),
   ('new', 124),
   ('federal', 111),
   ('many', 94),
   ('full', 81)],
  'adverbs': [('not', 339),
   ('now', 167),
   ('also', 91),
   ('only', 77),
   ('so', 68),
   ('still', 64),
   ('already', 43),
   ('however', 43),
   ('well', 40),
   ('far', 39)],
  'noun': [('world', 312),
   ('war', 295),
   ('year', 259),
   ('government', 218),
   ('congress', 187),
   ('program', 165),
   ('production', 114),
   ('time', 110),
   ('power', 107),
   ('peace', 100)],
  'pronouns': [('we', 1058),
   ('it', 433),
   ('i', 337),
   ('they', 192),
   ('us', 138),
   ('them', 78),
   ('me', 31),
   ('he', 28),
   ('you', 28),
   ('themselves', 20)],
  'verbs': [('be', 604),
   ('have', 88),
   ('continue', 78),
   ('do', 62),
   ('make', 61),
   ('provide', 55),
   ('meet', 48),
   ('take', 47),
   ('help', 34),
   ('give', 33)]},
 'vocabulary size': [6222, 5818]}

Dwight D. Eisenhower (1953-1961)


In [34]:
pos_stats('34DE.txt')


Out[34]:
{'POS counts': {None: 37255,
  'adjectives': 5571,
  'adverbs': 2078,
  'noun': 11333,
  'pronouns': 1487,
  'verbs': 2395},
 'avg chars per word': 4.7670483820034235,
 'number of capital letters': 6246,
 'stop word frequency': 0.4521747801951236,
 'top words by pos': {None: [('``', 21)],
  'adjectives': [('federal', 169),
   ('new', 156),
   ('economic', 141),
   ('free', 127),
   ('military', 123),
   ('other', 111),
   ('national', 106),
   ('such', 83),
   ('own', 75),
   ('many', 71)],
  'adverbs': [('not', 180),
   ('now', 99),
   ('also', 81),
   ('so', 79),
   ('only', 72),
   ('still', 39),
   ('forward', 35),
   ('well', 32),
   ('already', 31),
   ('just', 31)],
  'noun': [('world', 200),
   ('government', 192),
   ('congress', 154),
   ('program', 146),
   ('security', 133),
   ('year', 131),
   ('nation', 127),
   ('peace', 126),
   ('defense', 101),
   ('freedom', 99)],
  'pronouns': [('we', 579),
   ('i', 344),
   ('it', 241),
   ('us', 105),
   ('they', 89),
   ('them', 54),
   ('you', 19),
   ('itself', 17),
   ('themselves', 15),
   ('me', 11)],
  'verbs': [('be', 459),
   ('make', 57),
   ('provide', 44),
   ('continue', 43),
   ('help', 40),
   ('have', 37),
   ('meet', 35),
   ('strengthen', 30),
   ('do', 29),
   ('maintain', 29)]},
 'vocabulary size': [6319, 5942]}

John F. Kennedy (1961-1963)


In [35]:
pos_stats('35JFK.txt')


Out[35]:
{'POS counts': {None: 12343,
  'adjectives': 1577,
  'adverbs': 704,
  'noun': 3510,
  'pronouns': 538,
  'verbs': 747},
 'avg chars per word': 4.410111202635914,
 'number of capital letters': 1918,
 'stop word frequency': 0.4651462108731466,
 'top words by pos': {None: [('``', 18)],
  'adjectives': [('new', 79),
   ('free', 47),
   ('other', 45),
   ('own', 43),
   ('national', 35),
   ('economic', 34),
   ('common', 21),
   ('first', 20),
   ('federal', 19),
   ('last', 19)],
  'adverbs': [('not', 138),
   ('now', 38),
   ('only', 31),
   ('too', 17),
   ('abroad', 15),
   ('never', 14),
   ('also', 13),
   ('instead', 13),
   ('long', 13),
   ('nearly', 13)],
  'noun': [('world', 71),
   ('year', 43),
   ('nation', 42),
   ('congress', 40),
   ('defense', 34),
   ('peace', 31),
   ('program', 31),
   ('tax', 31),
   ('freedom', 27),
   ('growth', 27)],
  'pronouns': [('we', 208),
   ('i', 107),
   ('it', 103),
   ('us', 42),
   ('they', 33),
   ('them', 17),
   ('you', 10),
   ('themselves', 6),
   ('he', 5),
   ('itself', 2)],
  'verbs': [('be', 120),
   ('make', 20),
   ('help', 19),
   ('increase', 14),
   ('provide', 12),
   ('afford', 9),
   ('continue', 9),
   ('expand', 9),
   ('have', 9),
   ('meet', 9)]},
 'vocabulary size': [3545, 3239]}

Lyndon B. Johnson (1964-1969)


In [36]:
pos_stats('36LBJ.txt')


Out[36]:
{'POS counts': {None: 20764,
  'adjectives': 2055,
  'adverbs': 1159,
  'noun': 5371,
  'pronouns': 1677,
  'verbs': 1659},
 'avg chars per word': 4.2541590214067275,
 'number of capital letters': 3526,
 'stop word frequency': 0.498348623853211,
 'top words by pos': {None: [('``', 28)],
  'adjectives': [('new', 94),
   ('last', 61),
   ('american', 55),
   ('great', 55),
   ('many', 53),
   ('other', 51),
   ('first', 37),
   ('federal', 35),
   ('national', 30),
   ('own', 27)],
  'adverbs': [('not', 156),
   ('now', 74),
   ('so', 56),
   ('only', 36),
   ('very', 32),
   ('also', 31),
   ('well', 31),
   ('already', 29),
   ('here', 27),
   ('just', 25)],
  'noun': [('congress', 114),
   ('year', 111),
   ('nation', 89),
   ('world', 73),
   ('america', 63),
   ('vietnam', 62),
   ('war', 53),
   ('peace', 52),
   ('time', 49),
   ('government', 43)],
  'pronouns': [('we', 659),
   ('i', 391),
   ('it', 216),
   ('you', 107),
   ('they', 93),
   ('us', 81),
   ('them', 59),
   ('he', 23),
   ('me', 19),
   ('themselves', 11)],
  'verbs': [('be', 191),
   ('help', 50),
   ('make', 41),
   ('do', 35),
   ('have', 32),
   ('continue', 31),
   ('provide', 22),
   ('give', 21),
   ('meet', 20),
   ('take', 20)]},
 'vocabulary size': [4062, 3723]}

Richard Nixon (1970-1974)


In [37]:
pos_stats('37RN.txt')


Out[37]:
{'POS counts': {None: 13917,
  'adjectives': 1464,
  'adverbs': 837,
  'noun': 3722,
  'pronouns': 1019,
  'verbs': 997},
 'avg chars per word': 4.245948652585579,
 'number of capital letters': 1969,
 'stop word frequency': 0.5143390386016023,
 'top words by pos': {None: [('``', 13)],
  'adjectives': [('new', 115),
   ('great', 65),
   ('american', 55),
   ('federal', 50),
   ('first', 32),
   ('other', 30),
   ('own', 27),
   ('full', 26),
   ('many', 21),
   ('last', 20)],
  'adverbs': [('not', 131),
   ('now', 53),
   ('so', 41),
   ('only', 35),
   ('also', 32),
   ('well', 21),
   ('ago', 20),
   ('here', 19),
   ('just', 19),
   ('together', 18)],
  'noun': [('america', 112),
   ('congress', 101),
   ('government', 90),
   ('world', 79),
   ('nation', 70),
   ('year', 67),
   ('time', 59),
   ('peace', 57),
   ('today', 41),
   ('war', 35)],
  'pronouns': [('we', 388),
   ('i', 211),
   ('it', 150),
   ('us', 86),
   ('they', 70),
   ('you', 30),
   ('them', 29),
   ('he', 14),
   ('themselves', 14),
   ('me', 12)],
  'verbs': [('be', 149),
   ('have', 40),
   ('make', 39),
   ('do', 28),
   ('let', 27),
   ('help', 25),
   ('meet', 23),
   ('achieve', 15),
   ('provide', 15),
   ('give', 14)]},
 'vocabulary size': [2882, 2575]}

Gerald R. Ford (1975-1977)


In [38]:
pos_stats('38GRF.txt')


Out[38]:
{'POS counts': {None: 9515,
  'adjectives': 1311,
  'adverbs': 528,
  'noun': 2707,
  'pronouns': 666,
  'verbs': 639},
 'avg chars per word': 4.45172817047817,
 'number of capital letters': 1551,
 'stop word frequency': 0.4616034303534304,
 'top words by pos': {None: [('``', 7)],
  'adjectives': [('federal', 64),
   ('new', 60),
   ('many', 35),
   ('economic', 24),
   ('foreign', 23),
   ('good', 20),
   ('national', 18),
   ('american', 17),
   ('domestic', 17),
   ('great', 17)],
  'adverbs': [('not', 72),
   ('now', 33),
   ('only', 21),
   ('also', 18),
   ('still', 18),
   ('so', 16),
   ('too', 15),
   ('very', 12),
   ('again', 11),
   ('here', 11)],
  'noun': [('world', 57),
   ('congress', 54),
   ('energy', 54),
   ('year', 52),
   ('government', 44),
   ('president', 42),
   ('america', 38),
   ('state', 32),
   ('tax', 32),
   ('union', 30)],
  'pronouns': [('we', 236),
   ('i', 213),
   ('it', 98),
   ('us', 29),
   ('they', 28),
   ('you', 25),
   ('them', 15),
   ('me', 14),
   ('he', 4),
   ('him', 2)],
  'verbs': [('be', 83),
   ('help', 17),
   ('make', 14),
   ('have', 13),
   ('achieve', 11),
   ('work', 11),
   ('do', 9),
   ('go', 9),
   ('reduce', 9),
   ('take', 9)]},
 'vocabulary size': [2807, 2515]}

Jimmy Carter (1978-1981)


In [39]:
pos_stats('39JC.txt')


Out[39]:
{'POS counts': {None: 30622,
  'adjectives': 4566,
  'adverbs': 1375,
  'noun': 9883,
  'pronouns': 1263,
  'verbs': 1892},
 'avg chars per word': 4.80682459108855,
 'number of capital letters': 8219,
 'stop word frequency': 0.43141165095479816,
 'top words by pos': {None: [('``', 28)],
  'adjectives': [('new', 156),
   ('federal', 121),
   ('economic', 110),
   ('national', 102),
   ('other', 88),
   ('major', 79),
   ('international', 74),
   ('american', 68),
   ('first', 63),
   ('nuclear', 63)],
  'adverbs': [('not', 155),
   ('also', 101),
   ('now', 65),
   ('well', 42),
   ('so', 34),
   ('together', 34),
   ('only', 33),
   ('again', 21),
   ('even', 19),
   ('ever', 19)],
  'noun': [('world', 162),
   ('congress', 148),
   ('administration', 144),
   ('government', 143),
   ('year', 129),
   ('nation', 117),
   ('policy', 114),
   ('energy', 110),
   ('program', 104),
   ('security', 100)],
  'pronouns': [('we', 589),
   ('i', 285),
   ('it', 169),
   ('us', 74),
   ('they', 57),
   ('you', 33),
   ('them', 26),
   ('me', 11),
   ('itself', 6),
   ('themselves', 6)],
  'verbs': [('be', 230),
   ('continue', 67),
   ('provide', 61),
   ('help', 59),
   ('have', 43),
   ('make', 40),
   ('meet', 33),
   ('work', 30),
   ('increase', 28),
   ('take', 28)]},
 'vocabulary size': [5732, 5385]}

Ronald Reagan (1982-1988)


In [40]:
pos_stats('40RR.txt')


Out[40]:
{'POS counts': {None: 22849,
  'adjectives': 2505,
  'adverbs': 1503,
  'noun': 6578,
  'pronouns': 1698,
  'verbs': 1678},
 'avg chars per word': 4.30137916055818,
 'number of capital letters': 3623,
 'stop word frequency': 0.4622631264592496,
 'top words by pos': {None: [('``', 43)],
  'adjectives': [('new', 77),
   ('federal', 71),
   ('economic', 65),
   ('free', 63),
   ('american', 56),
   ('last', 44),
   ('great', 40),
   ('national', 38),
   ('next', 38),
   ('many', 35)],
  'adverbs': [('not', 196),
   ('now', 85),
   ('so', 73),
   ('well', 58),
   ("n't", 53),
   ('just', 52),
   ('only', 47),
   ('again', 45),
   ('also', 45),
   ('together', 42)],
  'noun': [('america', 160),
   ('government', 117),
   ('freedom', 91),
   ('world', 88),
   ('year', 80),
   ('congress', 71),
   ('peace', 69),
   ('time', 66),
   ('budget', 64),
   ('tax', 63)],
  'pronouns': [('we', 665),
   ('i', 257),
   ('it', 234),
   ('us', 154),
   ('you', 132),
   ('they', 100),
   ('them', 62),
   ('me', 34),
   ('he', 29),
   ('she', 10)],
  'verbs': [('be', 196),
   ('let', 52),
   ('make', 50),
   ('do', 46),
   ('help', 37),
   ('have', 34),
   ('work', 27),
   ('take', 26),
   ('continue', 24),
   ('say', 22)]},
 'vocabulary size': [4789, 4434]}

George H.W. Bush (1989-1992)


In [41]:
pos_stats('41GHB.txt')


Out[41]:
{'POS counts': {None: 12339,
  'adjectives': 1239,
  'adverbs': 747,
  'noun': 3385,
  'pronouns': 1075,
  'verbs': 976},
 'avg chars per word': 4.103764039259334,
 'number of capital letters': 2185,
 'stop word frequency': 0.4760700192249317,
 'top words by pos': {None: [('``', 33)],
  'adjectives': [('new', 66),
   ('american', 33),
   ('future', 28),
   ('federal', 24),
   ('economic', 23),
   ('free', 22),
   ('great', 20),
   ('first', 19),
   ('own', 18),
   ('good', 16)],
  'adverbs': [('not', 103),
   ('now', 49),
   ('so', 37),
   ("n't", 26),
   ('here', 24),
   ('just', 22),
   ('there', 20),
   ('never', 17),
   ('only', 17),
   ('too', 16)],
  'noun': [('america', 83),
   ('world', 81),
   ('time', 48),
   ('nation', 40),
   ('budget', 38),
   ('plan', 35),
   ('year', 35),
   ('freedom', 32),
   ('congress', 30),
   ('government', 29)],
  'pronouns': [('we', 347),
   ('i', 245),
   ('it', 149),
   ('you', 98),
   ('us', 72),
   ('they', 63),
   ('them', 33),
   ('he', 28),
   ('me', 26),
   ('she', 7)],
  'verbs': [('be', 107),
   ('do', 40),
   ('let', 39),
   ('make', 33),
   ('have', 26),
   ('help', 25),
   ('get', 19),
   ('work', 16),
   ('take', 14),
   ('give', 13)]},
 'vocabulary size': [3227, 2913]}

William J. Clinton (1993-2000)


In [42]:
pos_stats('42WC.txt')


Out[42]:
{'POS counts': {None: 41171,
  'adjectives': 4056,
  'adverbs': 2572,
  'noun': 10981,
  'pronouns': 3908,
  'verbs': 3737},
 'avg chars per word': 4.19430609266142,
 'number of capital letters': 6725,
 'stop word frequency': 0.48009991423025417,
 'top words by pos': {None: [('``', 27)],
  'adjectives': [('new', 264),
   ('last', 123),
   ('first', 100),
   ('american', 99),
   ('other', 87),
   ('national', 75),
   ('good', 67),
   ('many', 67),
   ('next', 59),
   ('social', 56)],
  'adverbs': [('not', 257),
   ('now', 201),
   ('so', 147),
   ("n't", 133),
   ('here', 104),
   ('just', 92),
   ('also', 88),
   ('together', 67),
   ('still', 64),
   ('too', 57)],
  'noun': [('america', 235),
   ('year', 198),
   ('world', 145),
   ('care', 131),
   ('congress', 129),
   ('health', 123),
   ('government', 116),
   ('country', 112),
   ('time', 111),
   ('work', 97)],
  'pronouns': [('we', 1388),
   ('i', 743),
   ('it', 506),
   ('you', 391),
   ('they', 292),
   ('us', 198),
   ('them', 168),
   ('me', 73),
   ('he', 67),
   ('she', 38)],
  'verbs': [('be', 319),
   ('do', 177),
   ('make', 147),
   ('work', 106),
   ('help', 100),
   ('have', 95),
   ('let', 77),
   ('give', 75),
   ('say', 63),
   ('go', 56)]},
 'vocabulary size': [5700, 5329]}

George W. Bush (2001-2008)


In [43]:
pos_stats('43GWB.txt')


Out[43]:
{'POS counts': {None: 30910,
  'adjectives': 3030,
  'adverbs': 1562,
  'noun': 8788,
  'pronouns': 2166,
  'verbs': 2400},
 'avg chars per word': 4.319924704341777,
 'number of capital letters': 5701,
 'stop word frequency': 0.45682776118181445,
 'top words by pos': {None: [('``', 32)],
  'adjectives': [('new', 123),
   ('american', 82),
   ('many', 79),
   ('great', 70),
   ('own', 70),
   ('good', 69),
   ('other', 57),
   ('last', 49),
   ('social', 47),
   ('federal', 46)],
  'adverbs': [('not', 262),
   ('so', 142),
   ('now', 80),
   ('also', 74),
   ('yet', 55),
   ('together', 44),
   ('never', 41),
   ('here', 38),
   ('only', 37),
   ('just', 34)],
  'noun': [('america', 237),
   ('world', 139),
   ('country', 123),
   ('security', 113),
   ('nation', 103),
   ('congress', 96),
   ('freedom', 90),
   ('government', 90),
   ('iraq', 88),
   ('health', 83)],
  'pronouns': [('we', 872),
   ('i', 305),
   ('it', 217),
   ('you', 215),
   ('they', 177),
   ('us', 140),
   ('them', 120),
   ('he', 52),
   ('me', 33),
   ('she', 11)],
  'verbs': [('be', 196),
   ('help', 83),
   ('make', 74),
   ('do', 44),
   ('have', 41),
   ('take', 39),
   ('continue', 36),
   ('keep', 36),
   ('work', 35),
   ('protect', 34)]},
 'vocabulary size': [5409, 5058]}

Barack Obama (2009-2015)


In [44]:
pos_stats('44OB.txt')


Out[44]:
{'POS counts': {None: 34110,
  'adjectives': 3033,
  'adverbs': 2208,
  'noun': 8658,
  'pronouns': 2808,
  'verbs': 2631},
 'avg chars per word': 4.172150004675956,
 'number of capital letters': 5213,
 'stop word frequency': 0.47141120359113436,
 'top words by pos': {None: [('``', 51)],
  'adjectives': [('new', 194),
   ('american', 113),
   ('last', 81),
   ('first', 61),
   ('next', 57),
   ('many', 56),
   ('other', 51),
   ('own', 44),
   ('small', 42),
   ('same', 40)],
  'adverbs': [('not', 230),
   ("n't", 216),
   ('so', 151),
   ('now', 127),
   ('just', 87),
   ('here', 79),
   ('also', 77),
   ('back', 57),
   ('even', 55),
   ('still', 53)],
  'noun': [('america', 188),
   ('year', 119),
   ('time', 108),
   ('country', 105),
   ('economy', 98),
   ('world', 98),
   ('congress', 90),
   ('energy', 89),
   ('tax', 79),
   ('nation', 68)],
  'pronouns': [('we', 1029),
   ('i', 494),
   ('it', 420),
   ('they', 227),
   ('you', 211),
   ('us', 151),
   ('them', 90),
   ('me', 52),
   ('he', 49),
   ('she', 48)],
  'verbs': [('be', 173),
   ('do', 111),
   ('make', 97),
   ('help', 73),
   ('let', 69),
   ('get', 67),
   ('have', 61),
   ('work', 56),
   ('keep', 49),
   ('give', 44)]},
 'vocabulary size': [5284, 4927]}

In [ ]:


In [ ]: